10 Minute Panda Training

from http://pandas.pydata.org/pandas-docs/dev/10min.html

Setup



In [45]:

    
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

Generating data and Pandas DataFrame with random values



In [63]:

    
s = pd.Series([1,3,5,np.nan,6,8])
dates = pd.date_range('20130101',periods=6)
df = pd.DataFrame(np.random.randn(6,5),index=dates,columns=list('ABCDE'))



In [62]:

    
np.random.randn(6,4)









    Out[62]:





array([[-0.72141978,  1.75647938, -0.84297956, -1.87490759],
       [ 0.61284124,  0.88152499,  0.23692212, -0.46196475],
       [ 0.49427992,  0.83530742, -1.06899914, -0.40358203],
       [ 1.26489497, -1.06422262, -1.55284967,  1.16017867],
       [-0.2598227 ,  0.86774058, -0.64080989,  0.75744095],
       [-0.72803805,  0.72472441,  0.81294184,  0.24446359]])



In [30]:

    
df2 = pd.DataFrame({ 'A' : 1.,
                    'B' : pd.Timestamp('20130102'),
                    'C' : pd.Series(1,index=list(range(4)),dtype='float32'),
                    'D' : np.array([3] * 4,dtype='int32'),
                    'E' : 'foo' })



In [31]:

    
df2









    Out[31]:






  
    
      
      A
      B
      C
      D
      E
    
  
  
    
      0
       1
      2013-01-02 00:00:00
       1
       3
       foo
    
    
      1
       1
      2013-01-02 00:00:00
       1
       3
       foo
    
    
      2
       1
      2013-01-02 00:00:00
       1
       3
       foo
    
    
      3
       1
      2013-01-02 00:00:00
       1
       3
       foo
    
  

4 rows × 5 columns



In [32]:

    
#I guess this shows where the data types are.
df2.dtypes









    Out[32]:





A           float64
B    datetime64[ns]
C           float32
D             int32
E            object
dtype: object



In [64]:

    
df.head()









    Out[64]:






  
    
      
      A
      B
      C
      D
      E
    
  
  
    
      2013-01-01
       1.100527
      -0.381699
      -0.451535
       0.713584
      -1.293972
    
    
      2013-01-02
      -0.391748
      -1.480627
       0.590120
      -0.640268
      -0.440093
    
    
      2013-01-03
       0.661144
       0.361374
      -0.882714
       1.120732
       0.276473
    
    
      2013-01-04
      -1.130035
      -0.168796
       0.072885
       0.875275
       0.575093
    
    
      2013-01-05
       2.405335
      -0.583955
      -0.690658
      -2.137181
      -1.716164
    
  

5 rows × 5 columns



In [37]:

    
df.index









    Out[37]:





<class 'pandas.tseries.index.DatetimeIndex'>
[2013-01-01 00:00:00, ..., 2013-01-06 00:00:00]
Length: 6, Freq: D, Timezone: None



In [41]:

    
df.values[0][0]









    Out[41]:





0.26366085386647958



In [42]:

    
df.T









    Out[42]:






  
    
      
      2013-01-01 00:00:00
      2013-01-02 00:00:00
      2013-01-03 00:00:00
      2013-01-04 00:00:00
      2013-01-05 00:00:00
      2013-01-06 00:00:00
    
  
  
    
      A
       0.263661
      -1.148435
       1.753792
       0.771710
      -0.456610
      -0.375286
    
    
      B
       1.263971
      -0.750341
      -0.436259
       1.207909
       0.944913
      -2.548341
    
    
      C
      -0.039726
      -0.228849
       0.566032
       1.505498
       0.573583
       0.647772
    
    
      D
       0.352130
       0.991504
       0.392104
      -1.446948
      -1.511025
       2.033515
    
  

4 rows × 6 columns



In [44]:

    
df.sort(columns='B', ascending=False)









    Out[44]:






  
    
      
      A
      B
      C
      D
    
  
  
    
      2013-01-01
       0.263661
       1.263971
      -0.039726
       0.352130
    
    
      2013-01-04
       0.771710
       1.207909
       1.505498
      -1.446948
    
    
      2013-01-05
      -0.456610
       0.944913
       0.573583
      -1.511025
    
    
      2013-01-03
       1.753792
      -0.436259
       0.566032
       0.392104
    
    
      2013-01-02
      -1.148435
      -0.750341
      -0.228849
       0.991504
    
    
      2013-01-06
      -0.375286
      -2.548341
       0.647772
       2.033515
    
  

6 rows × 4 columns



In [48]:

    
df['A']









    Out[48]:





2013-01-01    0.263661
2013-01-02   -1.148435
2013-01-03    1.753792
2013-01-04    0.771710
2013-01-05   -0.456610
2013-01-06   -0.375286
Freq: D, Name: A, dtype: float64



In [51]:

    
df.loc[:,['A','B']]









    Out[51]:






  
    
      
      A
      B
    
  
  
    
      2013-01-01
       0.263661
       1.263971
    
    
      2013-01-02
      -1.148435
      -0.750341
    
    
      2013-01-03
       1.753792
      -0.436259
    
    
      2013-01-04
       0.771710
       1.207909
    
    
      2013-01-05
      -0.456610
       0.944913
    
    
      2013-01-06
      -0.375286
      -2.548341
    
  

6 rows × 2 columns



In [52]:

    
df.at[dates[0],'A']









    Out[52]:





0.26366085386647958



In [53]:

    
df.iloc[3:5,0:2]









    Out[53]:






  
    
      
      A
      B
    
  
  
    
      2013-01-04
       0.77171
       1.207909
    
    
      2013-01-05
      -0.45661
       0.944913
    
  

2 rows × 2 columns



In [54]:









    Out[54]:






  
    
      
      A
      B
      C
      D
    
  
  
    
      2013-01-01
       0.263661
       1.263971
      -0.039726
       0.352130
    
    
      2013-01-02
      -1.148435
      -0.750341
      -0.228849
       0.991504
    
    
      2013-01-03
       1.753792
      -0.436259
       0.566032
       0.392104
    
    
      2013-01-04
       0.771710
       1.207909
       1.505498
      -1.446948
    
    
      2013-01-05
      -0.456610
       0.944913
       0.573583
      -1.511025
    
    
      2013-01-06
      -0.375286
      -2.548341
       0.647772
       2.033515
    
  

6 rows × 4 columns



In [57]:

    
df.iloc[1:3,:]









    Out[57]:






  
    
      
      A
      B
      C
      D
    
  
  
    
      2013-01-02
      -1.148435
      -0.750341
      -0.228849
       0.991504
    
    
      2013-01-03
       1.753792
      -0.436259
       0.566032
       0.392104
    
  

2 rows × 4 columns



In [58]:

    
#most efficient
df.iat[1,1]









    Out[58]:





-0.75034058387714442



In [ ]:

	A	B	C	D	E
0	1	2013-01-02 00:00:00	1	3	foo
1	1	2013-01-02 00:00:00	1	3	foo
2	1	2013-01-02 00:00:00	1	3	foo
3	1	2013-01-02 00:00:00	1	3	foo

	A	B	C	D	E
2013-01-01	1.100527	-0.381699	-0.451535	0.713584	-1.293972
2013-01-02	-0.391748	-1.480627	0.590120	-0.640268	-0.440093
2013-01-03	0.661144	0.361374	-0.882714	1.120732	0.276473
2013-01-04	-1.130035	-0.168796	0.072885	0.875275	0.575093
2013-01-05	2.405335	-0.583955	-0.690658	-2.137181	-1.716164

	2013-01-01 00:00:00	2013-01-02 00:00:00	2013-01-03 00:00:00	2013-01-04 00:00:00	2013-01-05 00:00:00	2013-01-06 00:00:00
A	0.263661	-1.148435	1.753792	0.771710	-0.456610	-0.375286
B	1.263971	-0.750341	-0.436259	1.207909	0.944913	-2.548341
C	-0.039726	-0.228849	0.566032	1.505498	0.573583	0.647772
D	0.352130	0.991504	0.392104	-1.446948	-1.511025	2.033515